from getExample import GetHtmlCode
from bs4 import BeautifulSoup
import urllib
import requests

def GetBaidu(baidu_url):
    baidu_page = ["&pn=0", "&pn=10", "&pn=20"]
    list = []
    baidu = open("text/baidu.txt", "a")
    for i in range(0,3):
        new_url = baidu_url + baidu_page[i]
        content_html = GetHtmlCode(new_url)
        if content_html is None:
            print("None")
            return None
        soup = BeautifulSoup(content_html, 'html.parser', from_encoding='utf-8')
        titles = soup.find_all('h3', class_="t")
        global count
        for title in titles:
            link = title.find('a')['href']
            #得到正常链接
            s = requests.Session()
            s.headers['User-Agent'] = "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Mobile Safari/537.36"
            try:
                r = s.get(link).url
                list.append(r)
                # print(r)
                baidu.write(r+'\n')
            except Exception as e:
                print(e)
                continue
    baidu.close()
    # return list

def GetSougou(sougou_url):
    sougou_page = ["&page=1", "&page=2", "&page=3"]
    sougou = open("text/sougou.txt", "a+")
    for i in range(0, 3):
        new_url = sougou_url + sougou_page[i]
        content_html = GetHtmlCode(new_url)
        if content_html is None:
            print("None")
            return None
        # print(content_html)
        soup = BeautifulSoup(content_html, 'html.parser', from_encoding='utf-8')
        titles = soup.find_all('h3', class_="vrTitle bdtit")
        for title in titles:
            link = title.find('a')['href']
            # 得到正常链接
            s = requests.Session()
            s.headers['User-Agent'] = "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Mobile Safari/537.36"
            r = s.get(link).url
            print(r)
            sougou.write(r + '\n')
    sougou.close()
    # return list
def GetBiying(biying_url):
    biying_page = ["&first=1", "&first=9", "&first=19"]
    biying = open("text/biying.txt", "a+")
    for i in range(0, 3):
        new_url = biying_url + biying_page[i]
        print(new_url)
        content_html = GetHtmlCode(new_url)
        if content_html is None:
            print("None")
            return None
        print(content_html)
        soup = BeautifulSoup(content_html, 'html.parser', from_encoding='utf-8')
        titles = soup.find_all('h2')
        for title in titles:
            link = title.find('a')['href']
            print(link)
            biying.write(link + '\n')
    biying.close()

if __name__ == "__main__":
    baidu_url = "http://www.baidu.com/s?wd="#query
    sougou_url = "http://www.sogou.com/sogou?query="
    biying_url = "http://cn.bing.com/search?q="
    example_list = open("text/example.txt","r")
    count = 0
    for example in example_list.readlines():
        query = example.strip()
        query = urllib.parse.quote(query)
        baidu_url += query
        sougou_url += query
        biying_url += query
        print(str(count)+ " qq" + query)
        count += 1
        # GetBaidu(baidu_url)
        # GetSougou(sougou_url)
        GetBiying(biying_url)

        #